-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[InstCombine] Inverse is.fpclass mask operand, when profitable #121378
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The `@llvm.is.fpclass` intrinsic is matched and generated by the InstCombine pass. When the number of set mask bits is greater than the number of the unset bits, it's profitable to replace the `is.fpclass(x, mask)` intrinsic call with a sequence of `!is.fpclass(x, ~mask)` operations. The following IR snippets are semantically equivalent: ```LLVM define i1 @src(float %src) { ; 639 == 0b1001111111 %class = call i1 @llvm.is.fpclass.f32(float %src, i32 639) ret i1 %class } define i1 @tgt(float %src) { ; 384 == 0b0110000000 == ~0b1001111111 & 0x3ff == ~639 & 0x3ff %class = call i1 @llvm.is.fpclass.f32(float %src, i32 384) %not = xor i1 %class, true ret i1 %not } ``` However, the generated code is more efficient for the 2nd IR sequence, at least on some targets. References: * https://alive2.llvm.org/ce/z/kkZhDt * https://godbolt.org/z/5WE8Wb3vz
|
@llvm/pr-subscribers-llvm-transforms Author: Victor Mustya (vmustya) ChangesThe The following IR snippets are semantically equivalent: define i1 @<!-- -->src(float %src) {
; 639 == 0b1001111111
%class = call i1 @<!-- -->llvm.is.fpclass.f32(float %src, i32 639)
ret i1 %class
}
define i1 @<!-- -->tgt(float %src) {
; 384 == 0b0110000000 == ~0b1001111111 & 0x3ff == ~639 & 0x3ff
%class = call i1 @<!-- -->llvm.is.fpclass.f32(float %src, i32 384)
%not = xor i1 %class, true
ret i1 %not
}However, the generated code is more efficient for the 2nd IR sequence, References: Patch is 67.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121378.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index e576eea4ca36a1..32c4555677c2d6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1522,9 +1522,21 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
if (ClassValLHS == ClassValRHS) {
unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS)
: (ClassMaskLHS | ClassMaskRHS);
- return Builder.CreateIntrinsic(
- Intrinsic::is_fpclass, {ClassValLHS->getType()},
- {ClassValLHS, Builder.getInt32(CombinedMask)});
+ unsigned InverseCombinedMask = ~CombinedMask & fcAllFlags;
+
+ // If the number of bits set in the combined mask is greater than the
+ // number of the unset bits, it is more efficient to use the inverse
+ // mask and invert the result.
+ bool IsInverse = popcount(CombinedMask) > popcount(InverseCombinedMask);
+ auto *MaskVal =
+ Builder.getInt32(IsInverse ? InverseCombinedMask : CombinedMask);
+
+ auto *II = Builder.CreateIntrinsic(Intrinsic::is_fpclass,
+ {ClassValLHS->getType()},
+ {ClassValLHS, MaskVal});
+ if (IsInverse)
+ return Builder.CreateNot(II);
+ return II;
}
}
}
@@ -1610,10 +1622,25 @@ Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO,
bool IsRHSClass =
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
m_Value(ClassVal1), m_ConstantInt(ClassMask1))));
- if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) &&
- (IsRHSClass || matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) &&
+
+ bool IsLHSInverseClass =
+ match(Op0, m_OneUse(m_Not(m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
+ m_Value(ClassVal0), m_ConstantInt(ClassMask0))))));
+ bool IsRHSInverseClass =
+ match(Op1, m_OneUse(m_Not(m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
+ m_Value(ClassVal1), m_ConstantInt(ClassMask1))))));
+
+ if ((((IsLHSClass || IsLHSInverseClass ||
+ matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) &&
+ (IsRHSClass || IsRHSInverseClass ||
+ matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) &&
ClassVal0 == ClassVal1) {
unsigned NewClassMask;
+ if (IsLHSInverseClass)
+ ClassMask0 = ~ClassMask0 & fcAllFlags;
+ if (IsRHSInverseClass)
+ ClassMask1 = ~ClassMask1 & fcAllFlags;
+
switch (BO.getOpcode()) {
case Instruction::And:
NewClassMask = ClassMask0 & ClassMask1;
@@ -4651,10 +4678,17 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
if (II->getIntrinsicID() == Intrinsic::is_fpclass) {
ConstantInt *ClassMask = cast<ConstantInt>(II->getArgOperand(1));
- II->setArgOperand(
- 1, ConstantInt::get(ClassMask->getType(),
- ~ClassMask->getZExtValue() & fcAllFlags));
- return replaceInstUsesWith(I, II);
+ auto ClassMaskValue = ClassMask->getZExtValue();
+ auto InverseMaskValue = ~ClassMaskValue & fcAllFlags;
+
+ // If the number of set bits in the class mask is less than the number of
+ // set bits in the inverse mask, it's more efficient to keep the "not"
+ // instruction instead of inverting the class mask.
+ if (popcount(ClassMaskValue) > popcount(InverseMaskValue)) {
+ II->setArgOperand(
+ 1, ConstantInt::get(ClassMask->getType(), InverseMaskValue));
+ return replaceInstUsesWith(I, II);
+ }
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index fd38738e3be80b..2c656a8c98c7a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1050,6 +1050,20 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
if (Mask == Known.KnownFPClasses)
return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
+ // If the number of set bits in the mask is greater than the number of the
+ // unset bits, it's more efficient to inverse the mask and the intrinsic
+ // result:
+ // if.fpclass(x, mask) -> !if.fpclass(x, ~mask)
+ //
+ auto InverseMask = ~Mask & fcAllFlags;
+ if (popcount<unsigned>(Mask) > popcount<unsigned>(InverseMask)) {
+ auto *NewII =
+ Builder.CreateIntrinsic(Intrinsic::is_fpclass, {Src0->getType()},
+ {Src0, Builder.getInt32(InverseMask)});
+ auto *Not = Builder.CreateNot(NewII);
+ return replaceInstUsesWith(II, Not);
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
index dcd79f58390023..4b52eb9d34af30 100644
--- a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
@@ -189,7 +189,8 @@ define i1 @fcmp_issubnormal_and_class_finite(half %x) {
define i1 @class_inf_or_fcmp_issubnormal(half %x) {
; CHECK-LABEL: @class_inf_or_fcmp_issubnormal(
-; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 756)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 267)
+; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[OR]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -289,7 +290,8 @@ define i1 @class_normal_or_fcmp_oeq_zero(half %x) {
define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
; CHECK-LABEL: @fcmp_ueq_zero_or_class_normal(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 660)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%ueq.inf = fcmp ueq half %x, 0.0
@@ -300,7 +302,8 @@ define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
define i1 @class_normal_or_fcmp_ueq_zero(half %x) {
; CHECK-LABEL: @class_normal_or_fcmp_ueq_zero(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 660)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%ueq.inf = fcmp ueq half %x, 0.0
diff --git a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll
index 9a723e8bc89ff5..765596165ea04f 100644
--- a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll
@@ -10,7 +10,8 @@
; Base pattern !isfinite(x) || x == 0.0
define i1 @not_isfinite_or_zero_f16(half %x) {
; CHECK-LABEL: @not_isfinite_or_zero_f16(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -23,7 +24,8 @@ define i1 @not_isfinite_or_zero_f16(half %x) {
; Base pattern x == 0.0 || !isfinite(x)
define i1 @not_isfinite_or_zero_f16_commute_or(half %x) {
; CHECK-LABEL: @not_isfinite_or_zero_f16_commute_or(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -36,7 +38,8 @@ define i1 @not_isfinite_or_zero_f16_commute_or(half %x) {
; Base pattern !isfinite(x) || x == -0.0
define i1 @not_isfinite_or_zero_f16_negzero(half %x) {
; CHECK-LABEL: @not_isfinite_or_zero_f16_negzero(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -48,7 +51,8 @@ define i1 @not_isfinite_or_zero_f16_negzero(half %x) {
define i1 @not_isfinite_or_fabs_oeq_zero_f16(half %x) {
; CHECK-LABEL: @not_isfinite_or_fabs_oeq_zero_f16(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -61,7 +65,8 @@ define i1 @not_isfinite_or_fabs_oeq_zero_f16(half %x) {
; Base pattern !isfinite(x) || x == 0.0
define <2 x i1> @not_isfinite_or_zero_v2f16(<2 x half> %x) {
; CHECK-LABEL: @not_isfinite_or_zero_v2f16(
-; CHECK-NEXT: [[CLASS:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: ret <2 x i1> [[CLASS]]
;
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
@@ -74,7 +79,8 @@ define <2 x i1> @not_isfinite_or_zero_v2f16(<2 x half> %x) {
; Base pattern !isfinite(x) || x == <0.0, -0.0>
define <2 x i1> @not_isfinite_or_zero_v2f16_pos0_neg0_vec(<2 x half> %x) {
; CHECK-LABEL: @not_isfinite_or_zero_v2f16_pos0_neg0_vec(
-; CHECK-NEXT: [[CLASS:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: ret <2 x i1> [[CLASS]]
;
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
@@ -87,7 +93,8 @@ define <2 x i1> @not_isfinite_or_zero_v2f16_pos0_neg0_vec(<2 x half> %x) {
; Base pattern x == 0.0 || !isfinite(x)
define <2 x i1> @not_isfinite_or_zero_v2f16_commute_or(<2 x half> %x) {
; CHECK-LABEL: @not_isfinite_or_zero_v2f16_commute_or(
-; CHECK-NEXT: [[CLASS:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: ret <2 x i1> [[CLASS]]
;
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
@@ -163,7 +170,8 @@ define i1 @not_isfinite_or_zero_f16_not_inf(half %x) {
; Positive test
define i1 @ueq_inf_or_ueq_zero(half %x) {
; CHECK-LABEL: @ueq_inf_or_ueq_zero(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 615)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 408)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -237,7 +245,8 @@ define i1 @not_isfinite_or_zero_f16_neg_inf(half %x) {
; Negative test
define i1 @olt_0_or_fabs_ueq_inf(half %x) {
; CHECK-LABEL: @olt_0_or_fabs_ueq_inf(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 543)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 480)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -455,7 +464,8 @@ define i1 @negated_isfinite_or_zero_f16_multi_use_cmp1(half %x, ptr %ptr) {
; Negative test
define i1 @negated_isfinite_or_zero_f16_multi_use_cmp0_not_one_inf(half %x) {
; CHECK-LABEL: @negated_isfinite_or_zero_f16_multi_use_cmp0_not_one_inf(
-; CHECK-NEXT: [[NOT_CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 411)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 612)
+; CHECK-NEXT: [[NOT_CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[NOT_CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -509,7 +519,8 @@ define i1 @fcmp_one_0_and_fcmp_une_fabs_inf(half %x) {
define i1 @fcmp_une_0_and_fcmp_une_fabs_inf(half %x) {
; CHECK-LABEL: @fcmp_une_0_and_fcmp_une_fabs_inf(
-; CHECK-NEXT: [[AND:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 411)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 612)
+; CHECK-NEXT: [[AND:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[AND]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -531,7 +542,8 @@ define i1 @fcmp_une_0_and_fcmp_une_neginf(half %x) {
define i1 @issubnormal_or_inf(half %x) {
; CHECK-LABEL: @issubnormal_or_inf(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 756)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 267)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -543,7 +555,8 @@ define i1 @issubnormal_or_inf(half %x) {
define i1 @olt_smallest_normal_or_inf(half %x) {
; CHECK-LABEL: @olt_smallest_normal_or_inf(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 764)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 259)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -567,7 +580,8 @@ define i1 @not_issubnormal_or_inf(half %x) {
define i1 @issubnormal_uge_or_inf(half %x) {
; CHECK-LABEL: @issubnormal_uge_or_inf(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 783)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 240)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -638,7 +652,8 @@ define i1 @issubnormal_or_finite_olt(half %x) {
; inf | nan | zero | subnormal
define i1 @issubnormal_or_finite_uge(half %x) {
; CHECK-LABEL: @issubnormal_or_finite_uge(
-; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 759)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 264)
+; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[OR]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -674,7 +689,8 @@ define i1 @not_zero_and_subnormal(half %x) {
define i1 @fcmp_fabs_uge_inf_or_fabs_uge_smallest_norm(half %x) {
; CHECK-LABEL: @fcmp_fabs_uge_inf_or_fabs_uge_smallest_norm(
-; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 783)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 240)
+; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[OR]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -811,7 +827,8 @@ entry:
define i1 @isnormal_uge_or_zero_oeq(half %x) #0 {
; CHECK-LABEL: @isnormal_uge_or_zero_oeq(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 879)
+; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 144)
+; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP0]], true
; CHECK-NEXT: ret i1 [[OR]]
;
entry:
@@ -1018,7 +1035,8 @@ define i1 @not_isnormalinf_or_uno_nofabs(half %x) #0 {
; -> ~pnormal
define i1 @not_negisnormalinf_or_inf(half %x) #0 {
; CHECK-LABEL: @not_negisnormalinf_or_inf(
-; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 767)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 256)
+; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[OR]]
;
%fabs = call half @llvm.fabs.f16(half %x)
@@ -1031,7 +1049,8 @@ define i1 @not_negisnormalinf_or_inf(half %x) #0 {
; -> ~pnormal
define i1 @not_negisnormalinf_or_posinf(half %x) #0 {
; CHECK-LABEL: @not_negisnormalinf_or_posinf(
-; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 767)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 256)
+; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[OR]]
;
%not.is.pos.normal.inf = fcmp ult half %x, 0xH0400
@@ -1068,7 +1087,8 @@ define i1 @olt_smallest_normal_or_ord(half %x) #0 {
; -> ~pinf
define i1 @olt_smallest_normal_or_uno(half %x) #0 {
; CHECK-LABEL: @olt_smallest_normal_or_uno(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 255)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 768)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%uno = fcmp uno half %x, 0.0
@@ -1138,7 +1158,8 @@ define i1 @olt_infinity_or_finite(half %x) #0 {
; -> zero|subnormal|normal
define i1 @olt_infinity_and_finite(half %x) #0 { ; bustttedddd
; CHECK-LABEL: @olt_infinity_and_finite(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 252)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 771)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%lt.infinity = fcmp olt half %x, 0xH7C00
@@ -1235,7 +1256,8 @@ define i1 @olt_infinity_or_ueq_inf(half %x) #0 {
; -> pnormal
define i1 @olt_smallest_normal_or_ueq_inf(half %x) #0 {
; CHECK-LABEL: @olt_smallest_normal_or_ueq_inf(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 767)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 256)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%lt.normal = fcmp olt half %x, 0xH0400
@@ -1259,7 +1281,8 @@ define i1 @olt_smallest_normal_or_une_inf(half %x) #0 {
; -> ninf | nnormal | subnormal | zero
define i1 @olt_smallest_normal_and_une_inf(half %x) #0 {
; CHECK-LABEL: @olt_smallest_normal_and_une_inf(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 252)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 771)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%lt.normal = fcmp olt half %x, 0xH0400
@@ -1270,7 +1293,8 @@ define i1 @olt_smallest_normal_and_une_inf(half %x) #0 {
define i1 @olt_smallest_normal_and_une_inf_or_oeq_smallest_normal(half %x) #0 {
; CHECK-LABEL: @olt_smallest_normal_and_une_inf_or_oeq_smallest_normal(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 252)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 771)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%lt.normal = fcmp olt half %x, 0xH0400
@@ -1283,7 +1307,8 @@ define i1 @olt_smallest_normal_and_une_inf_or_oeq_smallest_normal(half %x) #0 {
define i1 @olt_smallest_normal_and_une_inf_or_one_smallest_normal(half %x) #0 {
; CHECK-LABEL: @olt_smallest_normal_and_une_inf_or_one_smallest_normal(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 252)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 771)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLASS]]
;
%lt.normal = fcmp olt half %x, 0xH0400
@@ -1408,7 +1433,8 @@ define i1 @oeq_neginfinity_or_oeq_smallest_normal(half %x) #0 {
; -> ninf | fcZero | fcSubnormal
define i1 @oeq_neginfinity_or_olt_smallest_normal(half %x) #0 {
; CHECK-LABEL: @oeq_neginfinity_or_olt_smallest_normal(
-; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 252)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 771)
+; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CLAS...
[truncated]
|
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This case should be handled in CodeGen instead of InstCombine.
Fortunately, there is a helper function called invertFPClassTestIfSimpler to improve the codegen of TLI.expandIS_FPCLASS. You can adjust this function and expandIS_FPCLASS to make the codegen better on x86 and aarch64 :) https://godbolt.org/z/vh8476ae3
The
@llvm.is.fpclassintrinsic is matched and generated by the InstCombinepass. When the number of set mask bits is greater than the number of the unset
bits, it's profitable to replace the
is.fpclass(x, mask)intrinsic call witha sequence of
!is.fpclass(x, ~mask)operations.The following IR snippets are semantically equivalent:
However, the generated code is more efficient for the 2nd IR sequence,
at least on some targets.
References: